Group 14 - Project FP01¶

Time series anomaly detection - DAGMM¶

In [ ]:
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf

from dataset import *
from plots import *
from metrics import *
from models_functions import *

# Set style for matplotlib
plt.style.use("Solarize_Light2")

import plotly.io as pio
pio.renderers.default = "notebook_connected"
In [ ]:
# Path to the root directory of the dataset
ROOTDIR_DATASET_NORMAL =  '../dataset/normal'
ROOTDIR_DATASET_ANOMALY = '../dataset/collisions'

# TF_ENABLE_ONEDNN_OPTS=0 means that the model will not use the oneDNN library for optimization

import os
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'

Various parameters¶

In [ ]:
#freq = '1.0'
#freq = '0.1'
#freq = '0.01'
freq = '0.005'

file_name_normal = "_20220811_rbtc_"
file_name_collisions = "_collision_20220811_rbtc_"

recording_normal = [0, 2, 3, 4]
recording_collisions = [1, 5]

freq_str = freq.replace(".", "_")
features_folder_normal = f"./features/normal{freq_str}/"
features_folder_collisions = f"./features/collisions{freq_str}/"

Data¶

In [ ]:
df_features_normal, df_normal_raw, _ = get_dataframes(ROOTDIR_DATASET_NORMAL, file_name_normal, recording_normal, freq, None)
df_features_collisions, df_collisions_raw, df_collisions_raw_action = get_dataframes(ROOTDIR_DATASET_ANOMALY, file_name_collisions, recording_collisions, freq, None)
df_features_collisions_1, df_collisions_raw_1, df_collisions_raw_action_1 = get_dataframes(ROOTDIR_DATASET_ANOMALY, file_name_collisions, [1], freq, None)
df_features_collisions_5, df_collisions_raw_5, df_collisions_raw_action_5 = get_dataframes(ROOTDIR_DATASET_ANOMALY, file_name_collisions, [5], freq, None)
Loading data.
Found 31 different actions.
Loading data done.

Computing features.

Progress: 0% Complete

0

Skipped feature extraction for pickFromPallet(1,2)=[true,1,0] 2022-08-11 14:37:37.436000 : 2022-08-11 14:37:37.421000.
Skipped feature extraction for placeToPallet(1,1)=[true,0] 2022-08-11 14:37:37.421000 : 2022-08-11 14:37:37.442000.
Skipped feature extraction for pickFromPallet(3,2)=[true,1,0] 2022-08-11 15:36:32.568000 : 2022-08-11 15:36:32.533000.
Skipped feature extraction for pickFromPallet(3,2)=[true,1,0] 2022-08-11 15:36:32.572000 : 2022-08-11 15:36:32.561000.
Skipped feature extraction for placeToPallet(1,3)=[true,0] 2022-08-11 15:36:32.533000 : 2022-08-11 15:36:32.572000.
Skipped feature extraction for placeToPallet(1,3)=[true,0] 2022-08-11 15:36:32.561000 : 2022-08-11 15:36:32.561000.
--- 110.06365871429443 seconds ---
Loading data.
Found 31 different actions.
Loading data done.

Computing features.

Progress: 0% Complete

0

Skipped feature extraction for moveOverPallet(1,3)=[true,0] 2022-08-11 16:55:15.149000 : 2022-08-11 16:55:15.146000.
Skipped feature extraction for moveOverPallet(3,1)=[true,0] 2022-08-11 16:55:15.146000 : 2022-08-11 16:55:15.150000.
--- 45.07735848426819 seconds ---
Loading data.
Found 31 different actions.
Loading data done.

Computing features.

Progress: 0% Complete

0

--- 21.245415687561035 seconds ---
Loading data.
Found 31 different actions.
Loading data done.

Computing features.

Progress: 0% Complete

0

Skipped feature extraction for moveOverPallet(1,3)=[true,0] 2022-08-11 16:55:15.149000 : 2022-08-11 16:55:15.146000.
Skipped feature extraction for moveOverPallet(3,1)=[true,0] 2022-08-11 16:55:15.146000 : 2022-08-11 16:55:15.150000.
--- 19.005630254745483 seconds ---
In [ ]:
# df_features_normal, df_normal_raw, _ = get_dataframes(ROOTDIR_DATASET_NORMAL, file_name_normal, recording_normal, freq, f"{features_folder_normal}")
# df_features_collisions, df_collisions_raw, df_collisions_raw_action = get_dataframes(ROOTDIR_DATASET_ANOMALY, file_name_collisions, recording_collisions, freq, f"{features_folder_collisions}1_5/")
# df_features_collisions_1, df_collisions_raw_1, df_collisions_raw_action_1 = get_dataframes(ROOTDIR_DATASET_ANOMALY, file_name_collisions, [1], freq, f"{features_folder_collisions}1/")
# df_features_collisions_5, df_collisions_raw_5, df_collisions_raw_action_5 = get_dataframes(ROOTDIR_DATASET_ANOMALY, file_name_collisions, [5], freq, f"{features_folder_collisions}5/")
In [ ]:
X_train, y_train, X_test, y_test, df_test = get_train_test_data(df_features_normal, df_features_collisions, full_normal=True)
X_train_1, y_train_1, X_test_1, y_test_1, df_test_1 = get_train_test_data(df_features_normal, df_features_collisions_1, full_normal=True)
X_train_5, y_train_5, X_test_5, y_test_5, df_test_5 = get_train_test_data(df_features_normal, df_features_collisions_5, full_normal=True)

Collisions¶

In [ ]:
collisions_rec1, collisions_init1 = get_collisions('1', ROOTDIR_DATASET_ANOMALY)
collisions_rec5, collisions_init5 = get_collisions('5', ROOTDIR_DATASET_ANOMALY)

# Merge the collisions of the two recordings in one dataframe
collisions_rec = pd.concat([collisions_rec1, collisions_rec5])
collisions_init = pd.concat([collisions_init1, collisions_init5])
In [ ]:
collisions_zones, y_collisions = get_collisions_zones_and_labels(collisions_rec, collisions_init, df_features_collisions)
collisions_zones_1, y_collisions_1 = get_collisions_zones_and_labels(collisions_rec1, collisions_init1, df_features_collisions_1)
collisions_zones_5, y_collisions_5 = get_collisions_zones_and_labels(collisions_rec5, collisions_init5, df_features_collisions_5)

DAGMM for Anomaly Detection in Time Series Data¶

In [ ]:
from algorithms.dagmm import DAGMM

classifier = DAGMM(
    num_epochs=10,
    lambda_energy=0.1,
    lambda_cov_diag=0.005,
    lr=1e-4,
    batch_size=32,
    gmm_k=5,
    normal_percentile=80,
    sequence_length=30,
    autoencoder_type=DAGMM.AutoEncoder.LSTM,  # Using LSTM autoencoder
    hidden_size=32,
    autoencoder_args={
        'n_layers': (4, 4),
        'use_bias': (True, True),
        'dropout': (0.1, 0.1)
    },
    seed=42,
    gpu=None,  # Set to None for CPU, or specify GPU index if available
    details=True
)

# Train the DAGMM on normal data
classifier.fit(X_train)
print("DAGMM training completed.")
100%|██████████| 10/10 [00:31<00:00,  3.12s/it]
DAGMM training completed.

Predictions¶

In [ ]:
df_test = get_statistics(X_test, y_collisions, classifier, df_test, freq, threshold_type="mad")
df_test_1 = get_statistics(X_test_1, y_collisions_1, classifier, df_test_1, freq, threshold_type="mad")
df_test_5 = get_statistics(X_test_5, y_collisions_5, classifier, df_test_5, freq, threshold_type="mad")
Anomaly prediction completed.
Number of anomalies detected: 5 with threshold 25.04360360956374, std
Number of anomalies detected: 8 with threshold 22.712437048157057, mad
Number of anomalies detected: 16 with threshold 21.815066162745158, percentile
Number of anomalies detected: 0 with threshold 42.90918945046632, IQR
Number of anomalies detected: 169 with threshold 0.0, zero

choosen threshold type: mad, with value: 22.7124
F1 Score: 0.0177
Accuracy: 0.6373
Precision: 0.1250
Recall: 0.0095
              precision    recall  f1-score   support

           0       0.65      0.97      0.78       201
           1       0.12      0.01      0.02       105

    accuracy                           0.64       306
   macro avg       0.39      0.49      0.40       306
weighted avg       0.47      0.64      0.52       306

ROC AUC Score: 0.5367
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Anomalies detected: 8
Best threshold: -11.9408 | F1 Score: 0.5215 | Precision: 0.3633 | Recall: 0.9238
Anomalies detected with best threshold: 267

	-------------------------------------------------------------------------------------

Anomaly prediction completed.
Number of anomalies detected: 0 with threshold 34.76196224936218, std
Number of anomalies detected: 0 with threshold 29.971444313062563, mad
Number of anomalies detected: 9 with threshold 27.627885636829195, percentile
Number of anomalies detected: 0 with threshold 61.252890429397425, IQR
Number of anomalies detected: 97 with threshold 0.0, zero

choosen threshold type: mad, with value: 29.9714
F1 Score: 0.0000
Accuracy: 0.7866
Precision: 0.0000
Recall: 0.0000
              precision    recall  f1-score   support

           0       0.79      1.00      0.88       129
           1       0.00      0.00      0.00        35

    accuracy                           0.79       164
   macro avg       0.39      0.50      0.44       164
weighted avg       0.62      0.79      0.69       164

ROC AUC Score: 0.6303
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Anomalies detected: 0
Best threshold: 14.1643 | F1 Score: 0.4301 | Precision: 0.3448 | Recall: 0.5714
Anomalies detected with best threshold: 58

	-------------------------------------------------------------------------------------

Anomaly prediction completed.
Number of anomalies detected: 0 with threshold 12.892018650448753, std
Number of anomalies detected: 0 with threshold 8.626876203219096, mad
Number of anomalies detected: 8 with threshold 6.2170033405224485, percentile
Number of anomalies detected: 0 with threshold 25.798544782400132, IQR
Number of anomalies detected: 63 with threshold 0.0, zero

choosen threshold type: mad, with value: 8.6269
F1 Score: 0.0000
Accuracy: 0.6028
Precision: 0.0000
Recall: 0.0000
              precision    recall  f1-score   support

           0       0.60      1.00      0.75        85
           1       0.00      0.00      0.00        56

    accuracy                           0.60       141
   macro avg       0.30      0.50      0.38       141
weighted avg       0.36      0.60      0.45       141

ROC AUC Score: 0.5731
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Anomalies detected: 0
Best threshold: -15.6667 | F1 Score: 0.6171 | Precision: 0.4538 | Recall: 0.9643
Anomalies detected with best threshold: 119

	-------------------------------------------------------------------------------------

In [ ]:
plot_anomalies_true_and_predicted(df_collisions_raw, df_collisions_raw_action, collisions_zones, df_test, title="Collisions zones vs predicted zones for both recordings")
In [ ]:
plot_anomalies_true_and_predicted(df_collisions_raw_1, df_collisions_raw_action_1, collisions_zones_1, df_test_1, title="Collisions zones vs predicted zones for recording 1")
In [ ]:
plot_anomalies_true_and_predicted(df_collisions_raw_5, df_collisions_raw_action_5, collisions_zones_5, df_test_5, title="Collisions zones vs predicted zones for recording 5")